# set directory
setwd("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code")

# load packages
library(dada2)
library("phyloseq")
library(ggplot2)
library(tidyverse)
library(fantaxtic)
library(DECIPHER)
library(phangorn)
library(reshape)
library(janitor)
library(multcomp)
library(Rmisc)

# load phyloseq data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/phyloseq final variables.RData")

# load exploration type and guild data
load("~/Research/Mycorrhizae/Fieldwork/Genetics/NGS sequencing/Code/RA otu by guild and exploration type.RData")

exploration.guild.RA.full <- exploration.guild.RA
exploration.guild.RA <- exploration.guild.RA.full

# alpha diversity - will use Shannon and Simpson (included richness but don't use for final)
# observed is species richness, and shouldn't be used in final graphs due to issues with singletons
# but actually I read there is no way to measure richness and other papers do it so I will too
# plot overall alpha diversity metrics
plot_richness(physeq, measures=c("Shannon"))

# get overall numbers for alpha diversity
alpha.diversity <- estimate_richness(physeq, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

########################################################################
# ALPHA DIVERSITY BY HANSEN SIDE # 
#########################################################################

# filter phyloseq object by Hansen stream
physeq_hansen <- subset_samples(physeq, Stream=="Hansen")
physeq_hansen_close <- subset_samples(physeq_hansen, Distance < 7)

# remove decomposing carcass samples
physeq <- subset_samples(physeq_hansen_close, Sample != "G_6_Hansen_S4_SR_6_Org_S77")
physeq <- subset_samples(physeq_hansen_close, Sample != "F_7_Hansen_S4_SR_3_Org_S66")
physeq <- subset_samples(physeq_hansen_close, Sample != "D_4_Hansen_S4_SR_1_Org_S39")

# plot stream specific alpha diversity metrics
p <- plot_richness(physeq_hansen, color="Side", x="Side", measures=c("Observed","Shannon", "Simpson"))
p + geom_boxplot(data = p$data, aes(x = Side, y = value, color = NULL), alpha = 0.1)

# bar graph
ggplot(data = p$data, aes(Side, value, group=Side, fill=Side)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", 
               position=position_dodge(width=0.95), width=0.3) 

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq_hansen_close, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq_hansen_close), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq_hansen_close)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Observed)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Observed)
# neither Shannon or Simpson indices are normally distributed, so use wilcox rank sum test, however, Observed is normally distributed so use t.test

# if not normally distributed, do unpaired two-samples Wilcoxon test
hansen_left.observed <- data$Observed[which(data$Side == "SL")]
hansen_right.observed <- data$Observed[which(data$Side == "SR")]
t.test(hansen_right.observed, hansen_left.observed)
wilcox.test(hansen_left.observed, hansen_right.observed, alternative = "two.sided", exact=F)

hansen_left.shannon <- data$Shannon[which(data$Side == "SL")]
hansen_right.shannon <- data$Shannon[which(data$Side == "SR")]
t.test(hansen_right.shannon, hansen_left.shannon)
wilcox.test(hansen_left.shannon, hansen_right.shannon, alternative = "two.sided", exact=F)

hansen_left.simpson <- data$Simpson[which(data$Side == "SL")]
hansen_right.simpson <- data$Simpson[which(data$Side == "SR")]
t.test(hansen_right.simpson, hansen_left.simpson)
wilcox.test(hansen_left.simpson, hansen_right.simpson, alternative = "two.sided", exact=F)

# no difference in observed (richness), Shannon, or Simpson diversity index by close side at Hansen for either 3-6 or 1-6, or full bank

#############################################
# ALPHA DIVERSITY OF SAPROTROPHS AND SYMBIOTROPHS SEPARATELY BY CLOSE HANSEN BANK # 
############################################

# remove right bank distances near decomposing salmon carcass?
exploration.guild.RA <- exploration.guild.RA[-c(42,70,81),]

# or don't include S4 SR 1
exploration.guild.RA <- exploration.guild.RA[-c(70,81),]

#########################################
# test differences in the species richness of fungal guilds by Hansen close bank
#########################################

# close bank (1-6m)
left <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
one.way <- aov(symbiotroph.richness ~ Side, data=exploration.guild.RA[which(exploration.guild.RA$Stream== "Hansen" & exploration.guild.RA$Distance < 7),])
summary(one.way)
wilcox.test(left, right, exact=F)
one.way <- lm(symbiotroph.richness ~ Side + eriophorum, data=exploration.guild.RA[which(exploration.guild.RA$Stream== "Hansen" & exploration.guild.RA$Distance < 7),])
summary(one.way)

# CHANGED SYMBIOTROPH RICHNESS HIGHER ON RIGHT BANK
# WAY HIGHER ON BANK WITH NO S4 SR 1,3 AND 6

# close bank (3-6m)
left <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

# CHANGED WITH NO S4 SR 1-6, NOW SYMBIOTROPCH RICHNESS HIGHER ON RIGHT BANK

# full bank
left <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

yako <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Yako"),]
happy <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Happy"),]
hansen <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Hansen"),]
hansen.left <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Hansen" & exploration.guild.RA$Side=="SL"),]
hansen.right <- exploration.guild.RA[which(exploration.guild.RA$Stream=="Hansen" & exploration.guild.RA$Side=="SR"),]

#########################################
# test differences in the species richness of exploration types by Hansen close bank
#########################################

# close bank (1-6m)
hansen.left.close <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# CHANGED SHORT RICHNESS HIGHER ON RIGHT
# CHANGED AGAIN SHORT RICHNESS HIGHER ON RIGHT

hansen.left.close <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
one.way <- aov(medium.richness ~ Side, data=exploration.guild.RA[which(exploration.guild.RA$Stream== "Hansen" & exploration.guild.RA$Distance < 7),])
summary(one.way)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

hansen.left.close <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# close bank (3-6m)
hansen.left.close <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

hansen.left.close <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

hansen.left.close <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# long distance have higher richness between 3-6 m on left bank!! (with original dataset)

# test by full Hansen bank
hansen.left <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
hansen.right <- exploration.guild.RA$short.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(hansen.left, hansen.right)
wilcox.test(hansen.left, hansen.right, exact=F)

hansen.left <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
hansen.right <- exploration.guild.RA$medium.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(hansen.left, hansen.right)
wilcox.test(hansen.left, hansen.right, exact=F)

hansen.left <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
hansen.right <- exploration.guild.RA$long.richness[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(hansen.left, hansen.right)
wilcox.test(hansen.left, hansen.right, exact=F)

#########################################
# test differences in the species diversity of fungal guilds by Hansen close bank
#########################################

# close bank (1-6m)
left <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

# MAYBE symbiotroph diversity is different, higher on the right bank
# CHANGED SYMBIOTROPH DIVERSITY WAY HIGHER ON RIGHT WITH NO S4 SR 1-6

# close bank (3-6m)
left <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

# MAYBE symbiotroph diversity is different, higher on the right bank
# CHANGED SYMBIOTROPH DIVERSITY WAY HIGHER ON RIGHT WITH NO S4 SR 1-6

# full bank
left <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$saprotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

left <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
right <- exploration.guild.RA$symbiotroph.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(left, right)
wilcox.test(left, right, exact=F)

#########################################
# test differences in the species diversity of exploration types by Hansen close bank
#########################################

# close bank (1-6m)
hansen.left.close <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# MAYBE short distance diversity higher on right bank

hansen.left.close <- exploration.guild.RA$medium.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$medium.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

hansen.left.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# CHANGED - LONG DIVERSITY HIGHER ON LEFT BANK

# close bank (3-6m)
hansen.left.close <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# short diversity higher on right bank

hansen.left.close <- exploration.guild.RA$medium.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$medium.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

hansen.left.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Distance < 7 & exploration.guild.RA$Distance > 1 & exploration.guild.RA$Side=="SR")]
t.test(hansen.left.close, hansen.right.close)
wilcox.test(hansen.left.close, hansen.right.close, exact=F)

# long diversity zero on right bank? much larger on left bank

# test by full Hansen bank
hansen.left <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
hansen.right <- exploration.guild.RA$short.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(hansen.left, hansen.right)
wilcox.test(hansen.left, hansen.right, exact=F)

# CHANGED - SHORT DIVERSITY HIGHER ON RIGHT SIDE

hansen.left.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SL")]
hansen.right.close <- exploration.guild.RA$long.diversity[which(exploration.guild.RA$Stream == "Hansen" & exploration.guild.RA$Side=="SR")]
t.test(hansen.left, hansen.right)
wilcox.test(hansen.left, hansen.right, exact=F)

# long diversity higher on right

mod1 <- lm(long.diversity ~ C.N, data=hansen.left.close) 
mod2 <- lm(short.diversity ~ C.N, data=exploration.guild.RA) 

mod1 <- lm(log(long.diversity + 1) ~ C.N, data=exploration.guild.RA) 
mod2 <- lm(log(short.diversity+1) ~ C.N, data=exploration.guild.RA) 

# long distance diversity decreases with C:N on left bank but not on right!

summary(mod1)
summary(mod2)

##################################
# GRAPHS #
#################################


####################
# SMALL DISTANCE BY SIDE OF HANSEN # 
###################

# filter phyloseq object by distance at Hansen
physeq_hansen_close <- subset_samples(physeq_hansen, Distance < 11)

# plot stream specific alpha diversity metrics
p <- plot_richness(physeq_hansen_close, color="Side", x="Side", measures=c("Observed","Shannon", "Simpson"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = Side, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Side, value, fill=Side)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq_hansen_close, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq_hansen_close), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq_hansen_close)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Observed)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Observed)
# neither Shannon or Shannon indices are normally distributed, so use wilcox rank sum test
# Observed is normally distributed so can use t.test

# t test for normally distributed Observed
hansen_left.observed <- data$Observed[which(data$Side == "SL")]
hansen_right.observed <- data$Observed[which(data$Side == "SR")]
t.test(hansen_left.observed, hansen_right.observed)

# Wilcoxon rank test for normally distributed Observed
hansen_left.shannon <- data$Shannon[which(data$Side == "SL")]
hansen_right.shannon <- data$Shannon[which(data$Side == "SR")]
wilcox.test(hansen_left.shannon, hansen_right.shannon, alternative = "two.sided", exact=F)

hansen_left.simpson <- data$Simpson[which(data$Side == "SL")]
hansen_right.simpson <- data$Simpson[which(data$Side == "SR")]
wilcox.test(hansen_left.simpson, hansen_right.simpson, alternative = "two.sided", exact=F)

# no difference in observed (richness), Shannon, or Simpson diversity index by close side (<11 m
# AND < 10 AND < 6 m) of Hansen

############
# STREAM #
###########

# plot stream specific alpha diversity metrics
p <- plot_richness(physeq, color="Stream", x="Stream", measures=c("Observed","Shannon", "Simpson"))
p + geom_boxplot(data = p$data, aes(x = Stream, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Stream, value, group=Stream, fill=Stream)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", 
               position=position_dodge(width=0.95), width=0.3) 

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Simpson)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Simpson)
# neither Shannon, Simpson, oe Observed indices are normally distributed, so use wilcox rank sum test

# detect significant differences in means of alpha diversity by stream using ANOVA
#data$Stream <- as.factor(data$Stream)
#richness.anova <- aov(Observed ~ Stream, data)
#summary(richness.anova) ## Depth is very significant

# tukey test tells you which means are significantlly different
#TukeyHSD(richness.anova, conf.level=.95)

# we can do non-parametric test anyways to confirm, do unpaired two-samples Wilcoxon test
hansen.observed <- data$Observed[which(data$Stream == "Hansen")]
happy.observed <- data$Observed[which(data$Stream == "Happy")]
yako.observed <- data$Observed[which(data$Stream == "Yako")]
wilcox.test(hansen.observed, happy.observed, alternative = "two.sided", exact=F)
wilcox.test(hansen.observed, yako.observed, alternative = "two.sided", exact=F)
wilcox.test(happy.observed, yako.observed, alternative = "two.sided", exact=F)

hansen.shannon <- data$Shannon[which(data$Stream == "Hansen")]
happy.shannon <- data$Shannon[which(data$Stream == "Happy")]
yako.shannon <- data$Shannon[which(data$Stream == "Yako")]
wilcox.test(hansen.shannon, happy.shannon, alternative = "two.sided", exact=F)
wilcox.test(hansen.shannon, yako.shannon, alternative = "two.sided", exact=F) # SIGNIFICANT
wilcox.test(happy.shannon, yako.shannon, alternative = "two.sided", exact=F)

hansen.simpson <- data$Simpson[which(data$Stream == "Hansen")]
happy.simpson <- data$Simpson[which(data$Stream == "Happy")]
yako.simpson <- data$Simpson[which(data$Stream == "Yako")]
wilcox.test(hansen.simpson, happy.simpson, alternative = "two.sided", exact=F)
wilcox.test(hansen.simpson, yako.simpson, alternative = "two.sided", exact=F) # SIGNIFICANT
wilcox.test(happy.simpson, yako.simpson, alternative = "two.sided", exact=F)

# significant difference in Shannon and Simpson diversity index by stream

###################
# BY TRANSECT AT HANSEN # 
##################

# plot Hansen stream specific alpha diversity metrics by transect
p <- plot_richness(physeq_hansen, color="Transect", x="Transect", 
                   measures=c("Observed","Shannon", "Simpson"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = Transect, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Transect, value, fill=Transect)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq_hansen, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq_hansen), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq_hansen)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Simpson)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Simpson)

# Shannon, Simpson and observed are not normally distributed, so use Wilcoxon test 

# filter values of each diversity metric for testing
hansen_S1.observed <- data$Observed[which(data$Transect == "S1")]
hansen_S4.observed <- data$Observed[which(data$Transect == "S4")]
hansen_S6.observed <- data$Observed[which(data$Transect == "S6")]

# if not normally distributed, do unpaired two-samples Wilcoxon test
wilcox.test(hansen_S1.observed, hansen_S4.observed, alternative = "two.sided", exact=F)
wilcox.test(hansen_S1.observed, hansen_S6.observed, alternative = "two.sided", exact=F)
wilcox.test(hansen_S4.observed, hansen_S6.observed, alternative = "two.sided", exact=F)

hansen_S1.shannon <- data$Shannon[which(data$Transect == "S1")]
hansen_S4.shannon <- data$Shannon[which(data$Transect == "S4")]
hansen_S6.shannon <- data$Shannon[which(data$Transect == "S6")]
wilcox.test(hansen_S1.shannon, hansen_S4.shannon, alternative = "two.sided", exact=F)
wilcox.test(hansen_S1.shannon, hansen_S6.shannon, alternative = "two.sided", exact=F)
wilcox.test(hansen_S4.shannon, hansen_S6.shannon, alternative = "two.sided", exact=F)

hansen_S1.simpson <- data$Simpson[which(data$Transect == "S1")]
hansen_S4.simpson <- data$Simpson[which(data$Transect == "S4")]
hansen_S6.simpson <- data$Simpson[which(data$Transect == "S6")]
wilcox.test(hansen_S1.simpson, hansen_S4.simpson, alternative = "two.sided", exact=F)
wilcox.test(hansen_S1.simpson, hansen_S6.simpson, alternative = "two.sided", exact=F)
wilcox.test(hansen_S4.simpson, hansen_S6.simpson, alternative = "two.sided", exact=F)

# no differences in richness, Shannon, or Simpson between transects at Hansen

#######################
# TRANSECT BY ALL STREAMS # 
##########################

# plot transect specific alpha diversity metrics
p <- plot_richness(physeq, color="Transect", x="Transect", measures=c("Observed","Shannon", "Simpson"))
p + geom_boxplot(data = p$data, aes(x = Transect, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Transect, value, fill=Transect)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

####
# looks like there are some large differences at Happy and Yako, let's look at those separately

###################
# BY TRANSECT AT HAPPY # 
##################

# filter phyloseq object by Happy stream
physeq_happy <- subset_samples(physeq, Stream=="Happy")

# plot Happy stream specific alpha diversity metrics by transect
p <- plot_richness(physeq_happy, color="Transect", x="Transect", 
                   measures=c("Observed","Shannon", "Simpson"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = Transect, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Transect, value, fill=Transect)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq_happy, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq_happy), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq_happy)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Simpson)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Simpson)

# Observed is normally distributed, so use Welch's test
happy_h1.observed <- data$Observed[which(data$Transect == "H1")]
happy_h2.observed <- data$Observed[which(data$Transect == "H2")]
happy_h3.observed <- data$Observed[which(data$Transect == "H3")]
t.test(happy_h1.observed, happy_h2.observed)
t.test(happy_h1.observed, happy_h3.observed)
t.test(happy_h2.observed, happy_h3.observed)

# Shannon and Simpson are not normally distributed, do unpaired two-samples Wilcoxon test
happy_h1.shannon <- data$Shannon[which(data$Transect == "H1")]
happy_h2.shannon <- data$Shannon[which(data$Transect == "H2")]
happy_h3.shannon <- data$Shannon[which(data$Transect == "H3")]
wilcox.test(happy_h1.shannon, happy_h2.shannon, alternative = "two.sided", exact=F)
wilcox.test(happy_h1.shannon, happy_h3.shannon, alternative = "two.sided", exact=F)
wilcox.test(happy_h2.shannon, happy_h3.shannon, alternative = "two.sided", exact=F)

happy_h1.simpson <- data$Simpson[which(data$Transect == "H1")]
happy_h2.simpson <- data$Simpson[which(data$Transect == "H2")]
happy_h3.simpson <- data$Simpson[which(data$Transect == "H3")]
wilcox.test(happy_h1.simpson, happy_h2.simpson, alternative = "two.sided", exact=F)
wilcox.test(happy_h1.simpson, happy_h3.simpson, alternative = "two.sided", exact=F)
wilcox.test(happy_h2.simpson, happy_h3.simpson, alternative = "two.sided", exact=F)

# observed and Shannon ALMOST different between H2 and H3!!
# no difference in Simpson index between transects

###################
# BY TRANSECT AT YAKO # 
##################

# filter phyloseq object by Happy stream
physeq_yako <- subset_samples(physeq, Stream=="Yako")

# plot Yako stream specific alpha diversity metrics by transect
p <- plot_richness(physeq_yako, color="Transect", x="Transect", 
                   measures=c("Observed","Shannon", "Simpson"))

# boxplot
p + geom_boxplot(data = p$data, aes(x = Transect, y = value, color = NULL), alpha = 0.1)

# or bar graph of means with CI for each category computes the mean confidence interval 
# for each category based on a bootstrapping method (a simulation technique to avoid 
# making an assumption about a normal distribution)
ggplot(data = p$data, aes(Transect, value, fill=Transect)) + 
  stat_summary(fun.data=mean_sdl, geom="bar", position=position_dodge(width=0.95)) + 
  facet_wrap(~variable, ncol=3, scales="free") + 
  stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(width=0.95), width=0.3)

# get numbers for alpha diversity metrics at Hansen
alpha.diversity <- estimate_richness(physeq_yako, measures=c("Observed","Shannon", "Simpson"))
head(alpha.diversity)

# combine phyloseq object with alpha diversity metrics 
data <- cbind(sample_data(physeq_yako), alpha.diversity)

# add depth as potential covariate
data$Depth <- sample_sums(physeq_yako)

# Is dependent variable normally distributed? If so, use ANOVA!
hist(data$Simpson)
# if p value below is > 0.05, it is normally distributed!
shapiro.test(data$Simpson)

# Observed and Shannon are normally distributed, so use Welch's test
yako_y1.observed <- data$Observed[which(data$Transect == "Y1")]
yako_y2.observed <- data$Observed[which(data$Transect == "Y2")]
yako_y3.observed <- data$Observed[which(data$Transect == "Y3")]
t.test(yako_y1.observed, yako_y2.observed) # SIGNIFICANT
t.test(yako_y1.observed, yako_y3.observed) # SIGNIFICANT
t.test(yako_y2.observed, yako_y3.observed)

yako_y1.shannon <- data$Shannon[which(data$Transect == "Y1")]
yako_y2.shannon <- data$Shannon[which(data$Transect == "Y2")]
yako_y3.shannon <- data$Shannon[which(data$Transect == "Y3")]
t.test(yako_y1.shannon, yako_y2.shannon) 
t.test(yako_y1.shannon, yako_y3.shannon) 
t.test(yako_y2.shannon, yako_y3.shannon)

# Simpson is not normally distributed, do unpaired two-samples Wilcoxon test
yako_y1.simpson <- data$Simpson[which(data$Transect == "Y1")]
yako_y2.simpson <- data$Simpson[which(data$Transect == "Y2")]
yako_y3.simpson <- data$Simpson[which(data$Transect == "Y3")]
wilcox.test(yako_y1.simpson, yako_y2.simpson, alternative = "two.sided", exact=F)
wilcox.test(yako_y1.simpson, yako_y3.simpson, alternative = "two.sided", exact=F)
wilcox.test(yako_y2.simpson, yako_y3.simpson, alternative = "two.sided", exact=F)

# observed richness is significantly different between Y1 and Y2, and Y1 and Y3
# no different in Shannon and Simpson index between transects

